Detecting Brain Tumors¶

Imports¶

In [ ]:
import os
import numpy as np
import pandas as pd
from pandas.plotting import scatter_matrix
import matplotlib.pyplot as plt
import seaborn as sns
import random
import shutil

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import tensorflow as tf
In [ ]:
if not os.path.exists('./plots'):
    os.makedirs('./plots')

Load Dataset¶

In [ ]:
dff = pd.read_csv('data/Brain_Tumor.csv')
In [ ]:
dff.head()
Out[ ]:
Image Class Mean Variance Standard Deviation Entropy Skewness Kurtosis Contrast Energy ASM Homogeneity Dissimilarity Correlation Coarseness
0 Image1 0 6.535339 619.587845 24.891522 0.109059 4.276477 18.900575 98.613971 0.293314 0.086033 0.530941 4.473346 0.981939 7.458341e-155
1 Image2 0 8.749969 805.957634 28.389393 0.266538 3.718116 14.464618 63.858816 0.475051 0.225674 0.651352 3.220072 0.988834 7.458341e-155
2 Image3 1 7.341095 1143.808219 33.820234 0.001467 5.061750 26.479563 81.867206 0.031917 0.001019 0.268275 5.981800 0.978014 7.458341e-155
3 Image4 1 5.958145 959.711985 30.979219 0.001477 5.677977 33.428845 151.229741 0.032024 0.001026 0.243851 7.700919 0.964189 7.458341e-155
4 Image5 0 7.315231 729.540579 27.010009 0.146761 4.283221 19.079108 174.988756 0.343849 0.118232 0.501140 6.834689 0.972789 7.458341e-155
In [ ]:
dff.describe()
Out[ ]:
Class Mean Variance Standard Deviation Entropy Skewness Kurtosis Contrast Energy ASM Homogeneity Dissimilarity Correlation Coarseness
count 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3762.000000 3.762000e+03
mean 0.447368 9.488890 711.101063 25.182271 0.073603 4.102727 24.389071 127.961459 0.204705 0.058632 0.479252 4.698498 0.955767 7.458341e-155
std 0.497288 5.728022 467.466896 8.773526 0.070269 2.560940 56.434747 109.499601 0.129352 0.058300 0.127929 1.850173 0.026157 0.000000e+00
min 0.000000 0.078659 3.145628 1.773592 0.000882 1.886014 3.942402 3.194733 0.024731 0.000612 0.105490 0.681121 0.549426 7.458341e-155
25% 0.000000 4.982395 363.225459 19.058475 0.006856 2.620203 7.252852 72.125208 0.069617 0.004847 0.364973 3.412363 0.947138 7.458341e-155
50% 0.000000 8.477531 622.580417 24.951560 0.066628 3.422210 12.359088 106.737418 0.225496 0.050849 0.512551 4.482404 0.961610 7.458341e-155
75% 1.000000 13.212723 966.954319 31.095889 0.113284 4.651737 22.640304 161.059006 0.298901 0.089342 0.575557 5.723821 0.971355 7.458341e-155
max 1.000000 33.239975 2910.581879 53.949809 0.394539 36.931294 1371.640060 3382.574163 0.589682 0.347725 0.810921 27.827751 0.989972 7.458341e-155
In [ ]:
# drop Image feature
df = dff.drop(columns=['Image'])
In [ ]:
df.columns
Out[ ]:
Index(['Class', 'Mean', 'Variance', 'Standard Deviation', 'Entropy',
       'Skewness', 'Kurtosis', 'Contrast', 'Energy', 'ASM', 'Homogeneity',
       'Dissimilarity', 'Correlation', 'Coarseness'],
      dtype='object')

Data Exploration¶

In [ ]:
df[df.columns[1:14]].hist(alpha=0.8, figsize=(20, 20))
plt.savefig('plots/distributions.pdf')
No description has been provided for this image
In [ ]:
size=(20, 15)

for i in range(1, 14):
    plt.subplot(4, 4, i)
    df.groupby('Class')[df.columns[i]].hist(alpha=0.6, figsize=size)
    plt.legend(['malignant', 'benign'])
    plt.xlabel(df.columns[i])
    plt.tight_layout()
    plt.savefig('plots/benign_malignant_comparison.pdf')
No description has been provided for this image
In [ ]:
scatter_matrix(df[df.columns[1:14]], c=df['Class'], alpha=0.8, figsize=(30, 30), s=20)
plt.tight_layout()
# plt.show()
plt.savefig('plots/scatter_matrix.pdf', dpi=50)
No description has been provided for this image
In [ ]:
plt.figure(figsize=(20,20))
sns.heatmap(df[df.columns[0:13]].corr(), annot=True, square=True, cmap='coolwarm')
plt.tight_layout()
# plt.show()
plt.savefig('plots/correlation.pdf')
No description has been provided for this image

Preprocessing¶

In [ ]:
# test-train split
X, y = np.array(df.iloc[:, 1:14]), df['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=123)
In [ ]:
# scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Models¶

SVM¶

In [ ]:
SVM = SVC()
SVM.fit(X_train, y_train)
svm_pred = SVM.predict(X_test)
In [ ]:
print('Accuracy: %.4f' % accuracy_score(y_test, svm_pred))
Accuracy: 0.9801

Logistic Regression¶

In [ ]:
# Logistic Regression
lr = LogisticRegression(random_state=42)
lr.fit(X_train, y_train)
lr_pred = lr.predict(X_test)
In [ ]:
print('Accuracy: %.4f' % accuracy_score(y_test, lr_pred))
Accuracy: 0.9788

kNN¶

In [ ]:
# kNN
knn = KNeighborsClassifier()
knn.fit(X_train, y_train)
knn_pred = knn.predict(X_test)
In [ ]:
print('Accuracy: %.4f' % accuracy_score(y_test, knn_pred))
Accuracy: 0.9761

CNN¶

Load Images¶

In [ ]:
# move files from Brain_Tumor directory to Malignant and Benign directories
#for i in range(0, 3762):
#    if df['Class'][i]==0:
#        shutil.move(f'data/Brain_Tumor/Brain_Tumor/Image{i+1}.jpg', f'data/Brain_Tumor/Benign/Image{i+1}.jpg')
#    else:
#        shutil.move(f'data/Brain_Tumor/Brain_Tumor/Image{i+1}.jpg', f'data/Brain_Tumor/Malignant/Image{i+1}.jpg')
In [ ]:
img_height = 240
img_width = 240
batch_size = 32
data_dir = 'data/Brain_Tumor'

# get train dataset
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)
Found 3010 files belonging to 2 classes.
Using 2408 files for training.
In [ ]:
# get validation dataset
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size)
Found 3010 files belonging to 2 classes.
Using 602 files for validation.
In [ ]:
# get test dataset
test_ds = tf.keras.utils.image_dataset_from_directory(
    'data/test_data',
    shuffle=False,
    image_size=(img_height, img_width),
    batch_size=batch_size)
Found 752 files belonging to 2 classes.
In [ ]:
class_names = train_ds.class_names
print(class_names)
['Benign', 'Malignant']
In [ ]:
plt.figure(figsize=(10, 10))
for images, labels in train_ds.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")
    plt.savefig('plots/tumor_images.pdf')
2024-07-24 12:48:45.061036: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2408]
	 [[{{node Placeholder/_4}}]]
2024-07-24 12:48:45.061500: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [2408]
	 [[{{node Placeholder/_4}}]]
No description has been provided for this image
In [ ]:
CNN = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.Conv2D(24, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(12, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    # tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Conv2D(8, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dense(2)
])

optimizer = tf.optimizers.Adam(learning_rate=0.005)
CNN.compile(
  optimizer=optimizer,
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.
WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
In [ ]:
CNN_hist = CNN.fit(
    train_ds,
    validation_data=val_ds,
    epochs=30
)
Epoch 1/30
76/76 [==============================] - ETA: 0s - loss: 0.4832 - accuracy: 0.7961
2024-07-24 12:49:12.673031: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [602]
	 [[{{node Placeholder/_4}}]]
2024-07-24 12:49:12.673217: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [602]
	 [[{{node Placeholder/_4}}]]
76/76 [==============================] - 17s 229ms/step - loss: 0.4832 - accuracy: 0.7961 - val_loss: 0.4617 - val_accuracy: 0.8123
Epoch 2/30
76/76 [==============================] - 17s 222ms/step - loss: 0.4312 - accuracy: 0.8227 - val_loss: 0.4096 - val_accuracy: 0.8571
Epoch 3/30
76/76 [==============================] - 17s 226ms/step - loss: 0.4072 - accuracy: 0.8447 - val_loss: 0.3809 - val_accuracy: 0.8771
Epoch 4/30
76/76 [==============================] - 17s 222ms/step - loss: 0.3701 - accuracy: 0.8468 - val_loss: 0.3521 - val_accuracy: 0.8804
Epoch 5/30
76/76 [==============================] - 17s 219ms/step - loss: 0.3209 - accuracy: 0.8783 - val_loss: 0.3345 - val_accuracy: 0.9037
Epoch 6/30
76/76 [==============================] - 17s 218ms/step - loss: 0.2902 - accuracy: 0.8866 - val_loss: 0.3059 - val_accuracy: 0.8837
Epoch 7/30
76/76 [==============================] - 19s 252ms/step - loss: 0.2536 - accuracy: 0.9037 - val_loss: 0.2663 - val_accuracy: 0.9103
Epoch 8/30
76/76 [==============================] - 18s 236ms/step - loss: 0.2587 - accuracy: 0.8978 - val_loss: 0.2503 - val_accuracy: 0.9136
Epoch 9/30
76/76 [==============================] - 21s 278ms/step - loss: 0.2147 - accuracy: 0.9257 - val_loss: 0.2597 - val_accuracy: 0.9086
Epoch 10/30
76/76 [==============================] - 18s 229ms/step - loss: 0.1947 - accuracy: 0.9294 - val_loss: 0.2579 - val_accuracy: 0.9269
Epoch 11/30
76/76 [==============================] - 17s 227ms/step - loss: 0.1673 - accuracy: 0.9373 - val_loss: 0.2659 - val_accuracy: 0.9020
Epoch 12/30
76/76 [==============================] - 18s 230ms/step - loss: 0.1583 - accuracy: 0.9406 - val_loss: 0.2383 - val_accuracy: 0.9385
Epoch 13/30
76/76 [==============================] - 18s 231ms/step - loss: 0.1545 - accuracy: 0.9435 - val_loss: 0.2508 - val_accuracy: 0.9120
Epoch 14/30
76/76 [==============================] - 17s 228ms/step - loss: 0.1321 - accuracy: 0.9522 - val_loss: 0.2422 - val_accuracy: 0.9219
Epoch 15/30
76/76 [==============================] - 18s 237ms/step - loss: 0.1261 - accuracy: 0.9522 - val_loss: 0.2567 - val_accuracy: 0.9252
Epoch 16/30
76/76 [==============================] - 18s 231ms/step - loss: 0.1317 - accuracy: 0.9498 - val_loss: 0.2468 - val_accuracy: 0.9219
Epoch 17/30
76/76 [==============================] - 19s 256ms/step - loss: 0.1308 - accuracy: 0.9556 - val_loss: 0.2254 - val_accuracy: 0.9269
Epoch 18/30
76/76 [==============================] - 19s 253ms/step - loss: 0.1240 - accuracy: 0.9514 - val_loss: 0.2364 - val_accuracy: 0.9219
Epoch 19/30
76/76 [==============================] - 18s 240ms/step - loss: 0.1063 - accuracy: 0.9622 - val_loss: 0.2182 - val_accuracy: 0.9302
Epoch 20/30
76/76 [==============================] - 18s 232ms/step - loss: 0.0902 - accuracy: 0.9647 - val_loss: 0.2117 - val_accuracy: 0.9302
Epoch 21/30
76/76 [==============================] - 18s 230ms/step - loss: 0.0770 - accuracy: 0.9784 - val_loss: 0.2503 - val_accuracy: 0.9336
Epoch 22/30
76/76 [==============================] - 17s 228ms/step - loss: 0.0882 - accuracy: 0.9697 - val_loss: 0.2033 - val_accuracy: 0.9336
Epoch 23/30
76/76 [==============================] - 18s 230ms/step - loss: 0.0727 - accuracy: 0.9759 - val_loss: 0.2681 - val_accuracy: 0.9219
Epoch 24/30
76/76 [==============================] - 18s 230ms/step - loss: 0.0688 - accuracy: 0.9763 - val_loss: 0.2619 - val_accuracy: 0.9319
Epoch 25/30
76/76 [==============================] - 18s 232ms/step - loss: 0.1160 - accuracy: 0.9564 - val_loss: 0.2893 - val_accuracy: 0.9269
Epoch 26/30
76/76 [==============================] - 18s 235ms/step - loss: 0.0900 - accuracy: 0.9705 - val_loss: 0.2947 - val_accuracy: 0.9203
Epoch 27/30
76/76 [==============================] - 18s 236ms/step - loss: 0.0822 - accuracy: 0.9738 - val_loss: 0.2349 - val_accuracy: 0.9269
Epoch 28/30
76/76 [==============================] - 18s 230ms/step - loss: 0.0662 - accuracy: 0.9784 - val_loss: 0.2949 - val_accuracy: 0.9302
Epoch 29/30
76/76 [==============================] - 18s 235ms/step - loss: 0.0695 - accuracy: 0.9821 - val_loss: 0.2012 - val_accuracy: 0.9336
Epoch 30/30
76/76 [==============================] - 18s 241ms/step - loss: 0.0691 - accuracy: 0.9730 - val_loss: 0.2392 - val_accuracy: 0.9336
In [ ]:
if not os.path.exists('./models'):
    os.makedirs('./models')

CNN.save('./models/CNN')
2024-07-23 18:56:42.882581: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
	 [[{{node inputs}}]]
2024-07-23 18:56:43.006320: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
	 [[{{node inputs}}]]
WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op while saving (showing 3 of 3). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ./models/CNN/assets
INFO:tensorflow:Assets written to: ./models/CNN/assets

Evaluation¶

In [ ]:
import itertools
from sklearn.metrics import r2_score, classification_report, mean_squared_error, mean_absolute_error, confusion_matrix



def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]

    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, cm[i, j],
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')


def evaluate_model(test, predict, model):
    conf_mtx = confusion_matrix(test, predict)
    plot_confusion_matrix(cm=conf_mtx, classes=['malignant', 'benign'], title=f'Confusion matrix for {model}')
    plt.savefig(f'plots/confusion_matrix_{model}.pdf')
    plt.show()

    predict.reshape(-1)

    plt.hist(test, alpha=0.5, color='red', range=[0, 1], bins=2)
    plt.hist(predict, alpha=0.5, color='red', range=[0, 1], bins=2, histtype='step', linewidth=2)
    plt.tight_layout()
    plt.xlabel('Class')
    plt.ylabel('Number')
    plt.xticks([0, 1], ['malignant', 'benign'])
    plt.title(f'Prediction distribution for {model}')
    plt.legend(['testing prediction', 'testing true'], loc='upper right')
    plt.savefig(f'plots/histogram_{model}.pdf')
    plt.show()


def false_predictions(test, predict, model):
    errors = np.where(test != predict)[0][:6]
    fig, axs = plt.subplots(2, 3, figsize=(12, 8))
    axs = axs.ravel()

    for i, err in enumerate(errors):
        axs[i].imshow(images[i].numpy().astype("uint8"), cmap='gray')
        if predict[err] == 0:
            axs[i].set_title(f"Malignant")
        else:
            axs[i].set_title(f"Benign")
        # axs[i].set_title(f"Predicted: {predict[err]}")
        axs[i].axis('off')

    fig.tight_layout()
    plt.title('{model} False Predictions')
    plt.savefig(f'plots/false_predictions_{model}.pdf')
    plt.show()


def adv_classifications(test, predict):
    print(classification_report(test, predict))
    print('Coefficient of determination: %.4f' % r2_score(test, predict))
    # The mean squared error
    print("Mean squared error: %.4f" % mean_squared_error(test, predict))
    # The mean squared error
    print("Mean absolute error: %.4f" % mean_absolute_error(test, predict))


def full_evaluation(test, predict, model, images=False):
    print('Accuracy: %.4f' % accuracy_score(test, predict))
    adv_classifications(test, predict)
    evaluate_model(test, predict, model)
    if images == True : false_predictions(test, predict, model)

SVM¶

In [ ]:
full_evaluation(y_test, svm_pred, 'SVM')
Accuracy: 0.9801
              precision    recall  f1-score   support

           0       0.97      1.00      0.98       414
           1       0.99      0.96      0.98       339

    accuracy                           0.98       753
   macro avg       0.98      0.98      0.98       753
weighted avg       0.98      0.98      0.98       753

Coefficient of determination: 0.9195
Mean squared error: 0.0199
Mean absolute error: 0.0199
No description has been provided for this image
No description has been provided for this image

Logistic Regression¶

In [ ]:
# evaluate_model(y_test, lr_pred, 'LR')
full_evaluation(y_test, lr_pred, 'LR')
Accuracy: 0.9788
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       414
           1       0.99      0.96      0.98       339

    accuracy                           0.98       753
   macro avg       0.98      0.98      0.98       753
weighted avg       0.98      0.98      0.98       753

Coefficient of determination: 0.9142
Mean squared error: 0.0212
Mean absolute error: 0.0212
No description has been provided for this image
No description has been provided for this image

KNN¶

In [ ]:
# evaluate_model(y_test, knn_pred, 'kNN')
full_evaluation(y_test, knn_pred, 'kNN')
Accuracy: 0.9761
              precision    recall  f1-score   support

           0       0.96      1.00      0.98       414
           1       1.00      0.95      0.97       339

    accuracy                           0.98       753
   macro avg       0.98      0.97      0.98       753
weighted avg       0.98      0.98      0.98       753

Coefficient of determination: 0.9034
Mean squared error: 0.0239
Mean absolute error: 0.0239
No description has been provided for this image
No description has been provided for this image

CNN¶

In [ ]:
CNN_loaded = tf.keras.models.load_model("models/CNN")
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.RestoredOptimizer` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.RestoredOptimizer`.
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.
WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
In [ ]:
def plot_history(network_history):
    fig, axs = plt.subplots(2, 1, figsize=(10, 10))

    axs[0].plot(network_history.history['loss'])    
    axs[0].plot(network_history.history['val_loss'])
    axs[0].set_title('Model Loss')
    axs[0].set_ylabel('Loss')
    axs[0].set_xlabel('Epoch')
    axs[0].legend(['train', 'validation'], loc='upper right')

    axs[1].plot(network_history.history['accuracy'])
    axs[1].plot(network_history.history['val_accuracy'])
    axs[1].set_title('Model Accuracy')
    axs[1].set_ylabel('Accuracy')
    axs[1].set_xlabel('Epoch')
    axs[1].legend(['train', 'validation'], loc='lower right')
    plt.tight_layout()
    plt.savefig(f'plots/{network_history}-history.pdf')

    # plt.figure()
    # plt.xlabel('Epochs')
    # plt.ylabel('Loss')
    # plt.plot(network_history.history['loss'])
    # plt.plot(network_history.history['val_loss'])
    # plt.legend(['Training', 'Validation'])

    # plt.figure()
    # plt.xlabel('Epochs')
    # plt.ylabel('Accuracy')
    # plt.plot(network_history.history['accuracy'])
    # plt.plot(network_history.history['val_accuracy'])
    # plt.legend(['Training', 'Validation'], loc='lower right')
    # plt.show()
In [ ]:
plot_history(CNN_hist)
No description has been provided for this image
In [ ]:
cnn_pred = CNN_loaded.predict(test_ds)
cnn_pred = cnn_pred.argmax(axis=1)
# get test labels
test_labels = np.zeros(752)
test_labels[0:336] = 0
test_labels[336:] = 1
print('Accuracy: %.4f' % accuracy_score(test_labels, cnn_pred))
 1/24 [>.............................] - ETA: 2s
2024-07-24 12:57:56.338696: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [752]
	 [[{{node Placeholder/_4}}]]
2024-07-24 12:57:56.338906: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_4' with dtype int32 and shape [752]
	 [[{{node Placeholder/_4}}]]
24/24 [==============================] - 2s 94ms/step
Accuracy: 0.9229
In [ ]:
full_evaluation(test_labels, cnn_pred, 'CNN')
Accuracy: 0.9229
              precision    recall  f1-score   support

         0.0       0.86      0.99      0.92       336
         1.0       0.99      0.87      0.93       416

    accuracy                           0.92       752
   macro avg       0.92      0.93      0.92       752
weighted avg       0.93      0.92      0.92       752

Coefficient of determination: 0.6880
Mean squared error: 0.0771
Mean absolute error: 0.0771
No description has been provided for this image
No description has been provided for this image
In [ ]:
from sklearn.metrics import roc_curve, auc

fpr, tpr, thresholds = roc_curve(test_labels, cnn_pred)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.savefig('plots/roc_curve.pdf')
plt.show()
No description has been provided for this image

Hyperparameter optimization with Gridsearch¶

In [ ]:
from tensorflow.keras.callbacks import ModelCheckpoint, Callback
from tensorflow.keras.models import Sequential, load_model


if not os.path.exists('./gridsearch'):
    os.makedirs('./gridsearch')

search_results = []

filters_candidates = [24, 36, 48, 60, 72]
dense_candidates = [4, 8, 12, 16, 20, 24]
dropout_candidates = [.4, .5, .6]

for nb_filters in filters_candidates:
  for nb_dense in dense_candidates:
    for dropout in dropout_candidates:

      print(f"Start training for (filters={nb_filters} - dense={nb_dense} - dropout={dropout})")

      ########################################
      model = tf.keras.Sequential([
        tf.keras.layers.Rescaling(1./255),
        tf.keras.layers.Conv2D(nb_filters, 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Conv2D(int(nb_filters/2), 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        # tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Conv2D(int(nb_filters/3), 3, activation='relu'),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dropout(dropout),
        tf.keras.layers.Dense(nb_dense, activation='relu'),
        tf.keras.layers.Dense(2)
      ])

      optimizer = tf.optimizers.Adam()
      model.compile(
        optimizer=optimizer,
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy'])


      # we choose our best model as the one having the highest validation accuracy
      filepath = f"./gridsearch/cnn_paramsearch_filters_f={nb_filters}_dn={nb_dense}_do={dropout}.hdf5"
      checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=0, save_best_only=True, mode='max')

      fit_results = model.fit(
        train_ds,
        validation_data=val_ds,
        batch_size=batch_size,
        # reduced number of epochs for speed reasons --> should be higher!
        epochs=30,
        verbose=0,
        callbacks=[checkpoint],
      )

      # extract the best validation scores
      best_val_epoch    = np.argmax(fit_results.history['val_accuracy'])
      best_val_acc      = np.max(fit_results.history['val_accuracy'])
      best_val_acc_loss = fit_results.history['val_loss'][best_val_epoch]

      # get correct training accuracy
      best_model = load_model(filepath)
      
      # get test labels
      test_labels = np.zeros(752)
      test_labels[0:336] = 0
      test_labels[336:] = 1
      # test_labels.reshape(-1)

      best_val_acc_train_loss, best_val_acc_train_acc = best_model.evaluate(train_ds, verbose=0)


      # store results
      search_results.append({
          'nb_filters': nb_filters,
          'nb_dense': nb_dense,
          'dropout': dropout,
          'best_val_acc_train_acc': best_val_acc_train_acc,
          'best_val_acc': best_val_acc,
          'best_val_acc_train_loss': best_val_acc_train_loss,
          'best_val_acc_loss': best_val_acc_loss,
          'best_val_epoch': best_val_epoch,
          'history': fit_results.history,
          'train_loss': fit_results.history['loss']
      })
Start training for (filters=24 - dense=4 - dropout=0.4)
Start training for (filters=24 - dense=4 - dropout=0.5)
Start training for (filters=24 - dense=4 - dropout=0.6)
Start training for (filters=24 - dense=8 - dropout=0.4)
Start training for (filters=24 - dense=8 - dropout=0.5)
Start training for (filters=24 - dense=8 - dropout=0.6)
Start training for (filters=24 - dense=12 - dropout=0.4)
Start training for (filters=24 - dense=12 - dropout=0.5)
Start training for (filters=24 - dense=12 - dropout=0.6)
Start training for (filters=24 - dense=16 - dropout=0.4)
Start training for (filters=24 - dense=16 - dropout=0.5)
Start training for (filters=24 - dense=16 - dropout=0.6)
Start training for (filters=24 - dense=20 - dropout=0.4)
Start training for (filters=24 - dense=20 - dropout=0.5)
Start training for (filters=24 - dense=20 - dropout=0.6)
Start training for (filters=24 - dense=24 - dropout=0.4)
Start training for (filters=24 - dense=24 - dropout=0.5)
Start training for (filters=24 - dense=24 - dropout=0.6)
Start training for (filters=36 - dense=4 - dropout=0.4)
Start training for (filters=36 - dense=4 - dropout=0.5)
Start training for (filters=36 - dense=4 - dropout=0.6)
Start training for (filters=36 - dense=8 - dropout=0.4)
Start training for (filters=36 - dense=8 - dropout=0.5)
Start training for (filters=36 - dense=8 - dropout=0.6)
Start training for (filters=36 - dense=12 - dropout=0.4)
Start training for (filters=36 - dense=12 - dropout=0.5)
Start training for (filters=36 - dense=12 - dropout=0.6)
Start training for (filters=36 - dense=16 - dropout=0.4)
Start training for (filters=36 - dense=16 - dropout=0.5)
Start training for (filters=36 - dense=16 - dropout=0.6)
Start training for (filters=36 - dense=20 - dropout=0.4)
Start training for (filters=36 - dense=20 - dropout=0.5)
Start training for (filters=36 - dense=20 - dropout=0.6)
Start training for (filters=36 - dense=24 - dropout=0.4)
Start training for (filters=36 - dense=24 - dropout=0.5)
Start training for (filters=36 - dense=24 - dropout=0.6)
Start training for (filters=48 - dense=4 - dropout=0.4)
Start training for (filters=48 - dense=4 - dropout=0.5)
Start training for (filters=48 - dense=4 - dropout=0.6)
Start training for (filters=48 - dense=8 - dropout=0.4)
Start training for (filters=48 - dense=8 - dropout=0.5)
Start training for (filters=48 - dense=8 - dropout=0.6)
Start training for (filters=48 - dense=12 - dropout=0.4)
Start training for (filters=48 - dense=12 - dropout=0.5)
Start training for (filters=48 - dense=12 - dropout=0.6)
Start training for (filters=48 - dense=16 - dropout=0.4)
Start training for (filters=48 - dense=16 - dropout=0.5)
Start training for (filters=48 - dense=16 - dropout=0.6)
Start training for (filters=48 - dense=20 - dropout=0.4)
Start training for (filters=48 - dense=20 - dropout=0.5)
Start training for (filters=48 - dense=20 - dropout=0.6)
Start training for (filters=48 - dense=24 - dropout=0.4)
Start training for (filters=48 - dense=24 - dropout=0.5)
Start training for (filters=48 - dense=24 - dropout=0.6)
Start training for (filters=60 - dense=4 - dropout=0.4)
Start training for (filters=60 - dense=4 - dropout=0.5)
Start training for (filters=60 - dense=4 - dropout=0.6)
Start training for (filters=60 - dense=8 - dropout=0.4)
Start training for (filters=60 - dense=8 - dropout=0.5)
Start training for (filters=60 - dense=8 - dropout=0.6)
Start training for (filters=60 - dense=12 - dropout=0.4)
Start training for (filters=60 - dense=12 - dropout=0.5)
Start training for (filters=60 - dense=12 - dropout=0.6)
Start training for (filters=60 - dense=16 - dropout=0.4)
Start training for (filters=60 - dense=16 - dropout=0.5)
Start training for (filters=60 - dense=16 - dropout=0.6)
Start training for (filters=60 - dense=20 - dropout=0.4)
Start training for (filters=60 - dense=20 - dropout=0.5)
Start training for (filters=60 - dense=20 - dropout=0.6)
Start training for (filters=60 - dense=24 - dropout=0.4)
Start training for (filters=60 - dense=24 - dropout=0.5)
Start training for (filters=60 - dense=24 - dropout=0.6)
Start training for (filters=72 - dense=4 - dropout=0.4)
Start training for (filters=72 - dense=4 - dropout=0.5)
Start training for (filters=72 - dense=4 - dropout=0.6)
Start training for (filters=72 - dense=8 - dropout=0.4)
Start training for (filters=72 - dense=8 - dropout=0.5)
Start training for (filters=72 - dense=8 - dropout=0.6)
Start training for (filters=72 - dense=12 - dropout=0.4)
Start training for (filters=72 - dense=12 - dropout=0.5)
Start training for (filters=72 - dense=12 - dropout=0.6)
Start training for (filters=72 - dense=16 - dropout=0.4)
Start training for (filters=72 - dense=16 - dropout=0.5)
Start training for (filters=72 - dense=16 - dropout=0.6)
Start training for (filters=72 - dense=20 - dropout=0.4)
Start training for (filters=72 - dense=20 - dropout=0.5)
Start training for (filters=72 - dense=20 - dropout=0.6)
Start training for (filters=72 - dense=24 - dropout=0.4)
Start training for (filters=72 - dense=24 - dropout=0.5)
Start training for (filters=72 - dense=24 - dropout=0.6)
In [ ]:
resultsDF = pd.DataFrame(search_results)

# sort values
resultsDF.sort_values('best_val_acc', ascending=False)
Out[ ]:
nb_filters nb_dense dropout best_val_acc_train_acc best_val_acc best_val_acc_train_loss best_val_acc_loss best_val_epoch history train_loss
52 48 24 0.5 0.999585 0.953488 0.008526 0.184757 28 {'loss': [0.48961323499679565, 0.3927376568317... [0.48961323499679565, 0.39273765683174133, 0.3...
26 36 12 0.6 0.985880 0.953488 0.052991 0.203555 20 {'loss': [0.5133034586906433, 0.42175251245498... [0.5133034586906433, 0.4217525124549866, 0.365...
77 72 8 0.6 0.998754 0.953488 0.010520 0.208419 24 {'loss': [0.5115669965744019, 0.42088097333908... [0.5115669965744019, 0.4208809733390808, 0.371...
29 36 16 0.6 0.995432 0.953488 0.027829 0.173891 29 {'loss': [0.5065136551856995, 0.41559809446334... [0.5065136551856995, 0.4155980944633484, 0.374...
40 48 8 0.5 0.996262 0.953488 0.020991 0.201622 24 {'loss': [0.5300318598747253, 0.45309635996818... [0.5300318598747253, 0.4530963599681854, 0.371...
... ... ... ... ... ... ... ... ... ... ...
0 24 4 0.4 0.978821 0.936877 0.099690 0.227430 26 {'loss': [0.5506211519241333, 0.49073529243469... [0.5506211519241333, 0.4907352924346924, 0.453...
38 48 4 0.6 0.959302 0.935216 0.096160 0.187134 26 {'loss': [0.5921011567115784, 0.50595074892044... [0.5921011567115784, 0.5059507489204407, 0.458...
6 24 12 0.4 0.981312 0.935216 0.053870 0.223632 17 {'loss': [0.5056522488594055, 0.38887330889701... [0.5056522488594055, 0.38887330889701843, 0.34...
18 36 4 0.4 0.990033 0.933555 0.066137 0.297967 27 {'loss': [0.5542446970939636, 0.50764954090118... [0.5542446970939636, 0.5076495409011841, 0.476...
5 24 8 0.6 0.961379 0.931894 0.101981 0.202954 22 {'loss': [0.534759521484375, 0.447866082191467... [0.534759521484375, 0.4478660821914673, 0.3857...

90 rows × 10 columns

In [ ]:
resultsDF['delta_acc'] = (resultsDF['best_val_acc_train_acc']-resultsDF['best_val_acc'])/resultsDF['best_val_acc']
In [ ]:
sns.pairplot(resultsDF, x_vars=['nb_filters', 'nb_dense', 'dropout', ], y_vars=['best_val_acc', 'best_val_acc_train_acc', 'delta_acc'], kind='reg',  height=2)
plt.savefig('plots/pairplot.pdf')
No description has been provided for this image
In [ ]:
# Part of solution for task 3

# Let's inspect the history object:
# search_results[0]['history'].keys()

# # the entry "train_loss" was added by us in the callback, normally it is just 'loss'

# # which combinations perform best?
# resultsDF = pd.DataFrame(search_results).sort_values('best_val_acc', ascending=False)
# display(resultsDF)

top_3_indices = resultsDF.index.values[:3]
In [ ]:
# empty plots, just to get the legend entries
plt.plot([],[],'k--', label='Training')
plt.plot([],[],'k-', label='Validation')

print(resultsDF['history'][0].keys())
# let's have a look at loss curves of the three best performing models
for idx, (row_index, row_series) in enumerate(resultsDF.sort_values('best_val_acc', ascending=False).head(3).iterrows()):
  x = np.arange(1, len(row_series['history']['loss'])+1)
  parameter_combination_string = f"$n_{{\\mathrm{{filter}}}}=${row_series['nb_filters']}, $n_{{\\mathrm{{dense}}}}=${row_series['nb_dense']}, $do=${row_series['dropout']}"
  plt.plot(x, row_series['history']['loss'], '--', color=f'C{idx}')
  plt.plot(x, row_series['history']['val_loss'], '-', color=f'C{idx}')

  # and again empty, just for the legend entry
  plt.fill_between([],[],[],color=f'C{idx}', label=parameter_combination_string)


plt.xlabel('Epochs')
# limit ticks to integers using the length of the last results loss curve
# plt.xticks(x)
plt.ylabel('Categorical crossentropy loss')

# people should use those frames less frequently I think
plt.legend(frameon=False)
dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])
Out[ ]:
<matplotlib.legend.Legend at 0x7fcad449e610>
No description has been provided for this image
In [ ]:
resultsDF.to_csv('gridsearch_results.csv', index=False)
In [ ]:
resultsDF.sort_values('delta_acc', ascending=True).head(3)
Out[ ]:
nb_filters nb_dense dropout best_val_acc_train_acc best_val_acc best_val_acc_train_loss best_val_acc_loss best_val_epoch history train_loss delta_acc
2 24 4 0.6 0.948505 0.936877 0.142610 0.227296 25 {'loss': [0.5562707185745239, 0.49141937494277... [0.5562707185745239, 0.49141937494277954, 0.46... 0.012411
10 24 16 0.5 0.965116 0.946844 0.102133 0.185263 20 {'loss': [0.4974000155925751, 0.44332581758499... [0.4974000155925751, 0.44332581758499146, 0.36... 0.019298
38 48 4 0.6 0.959302 0.935216 0.096160 0.187134 26 {'loss': [0.5921011567115784, 0.50595074892044... [0.5921011567115784, 0.5059507489204407, 0.458... 0.025755
In [ ]:
# empty plots, just to get the legend entries
plt.plot([],[],'k--', label='Training')
plt.plot([],[],'k-', label='Validation')

# print(resultsDF['history'][0].keys())
# let's have a look at loss curves of the three best performing models
for idx, (row_index, row_series) in enumerate(resultsDF.sort_values('delta_acc', ascending=True).head(3).iterrows()):
  x = np.arange(1, len(row_series['history']['loss'])+1)
  parameter_combination_string = f"$n_{{\\mathrm{{filter}}}}=${row_series['nb_filters']}, $n_{{\\mathrm{{dense}}}}=${row_series['nb_dense']}, $do=${row_series['dropout']}"
  plt.plot(x, row_series['history']['loss'], '--', color=f'C{idx}')
  plt.plot(x, row_series['history']['val_loss'], '-', color=f'C{idx}')

  # and again empty, just for the legend entry
  plt.fill_between([],[],[],color=f'C{idx}', label=parameter_combination_string)
  # print("delta_acc", row_series['delta_acc'], 'best_val_acc', row_series['best_val_acc'], 'best_val_acc_train_acc', row_series['best_val_acc_train_acc'])
  


plt.xlabel('Epochs')
# limit ticks to integers using the length of the last results loss curve
# plt.xticks(x)
plt.ylabel('Categorical crossentropy loss')

# people should use those frames less frequently I think
plt.legend(frameon=False)
Out[ ]:
<matplotlib.legend.Legend at 0x7fcad555cf10>
No description has been provided for this image
In [ ]:
best_model = tf.keras.Sequential([
    tf.keras.layers.Rescaling(1./255),
    tf.keras.layers.Conv2D(24, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Conv2D(12, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    # tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Conv2D(8, 3, activation='relu'),
    tf.keras.layers.MaxPooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.6),
    tf.keras.layers.Dense(4, activation='relu'),
    tf.keras.layers.Dense(2)
])

optimizer = tf.optimizers.Adam(learning_rate=0.005)
best_model.compile(
  optimizer=optimizer,
  loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
  metrics=['accuracy'])
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.
WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
In [ ]:
best_model_hist = best_model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=100,
    verbose=2
)
Epoch 1/100
76/76 - 17s - loss: 0.5415 - accuracy: 0.7554 - val_loss: 0.4171 - val_accuracy: 0.8040 - 17s/epoch - 229ms/step
Epoch 2/100
76/76 - 17s - loss: 0.3871 - accuracy: 0.8326 - val_loss: 0.3768 - val_accuracy: 0.8306 - 17s/epoch - 223ms/step
Epoch 3/100
76/76 - 17s - loss: 0.3538 - accuracy: 0.8509 - val_loss: 0.3128 - val_accuracy: 0.8704 - 17s/epoch - 222ms/step
Epoch 4/100
76/76 - 17s - loss: 0.3340 - accuracy: 0.8600 - val_loss: 0.3211 - val_accuracy: 0.8738 - 17s/epoch - 220ms/step
Epoch 5/100
76/76 - 16s - loss: 0.3007 - accuracy: 0.8654 - val_loss: 0.2762 - val_accuracy: 0.8937 - 16s/epoch - 216ms/step
Epoch 6/100
76/76 - 16s - loss: 0.2874 - accuracy: 0.8783 - val_loss: 0.3164 - val_accuracy: 0.8688 - 16s/epoch - 204ms/step
Epoch 7/100
76/76 - 15s - loss: 0.2848 - accuracy: 0.8733 - val_loss: 0.2787 - val_accuracy: 0.8804 - 15s/epoch - 196ms/step
Epoch 8/100
76/76 - 15s - loss: 0.2622 - accuracy: 0.8858 - val_loss: 0.2873 - val_accuracy: 0.8787 - 15s/epoch - 204ms/step
Epoch 9/100
76/76 - 17s - loss: 0.2467 - accuracy: 0.8958 - val_loss: 0.2648 - val_accuracy: 0.8887 - 17s/epoch - 218ms/step
Epoch 10/100
76/76 - 14s - loss: 0.2393 - accuracy: 0.9045 - val_loss: 0.2829 - val_accuracy: 0.8804 - 14s/epoch - 182ms/step
Epoch 11/100
76/76 - 14s - loss: 0.2428 - accuracy: 0.8978 - val_loss: 0.2526 - val_accuracy: 0.8953 - 14s/epoch - 187ms/step
Epoch 12/100
76/76 - 15s - loss: 0.2165 - accuracy: 0.9128 - val_loss: 0.2214 - val_accuracy: 0.9120 - 15s/epoch - 199ms/step
Epoch 13/100
76/76 - 17s - loss: 0.2197 - accuracy: 0.9049 - val_loss: 0.2258 - val_accuracy: 0.9120 - 17s/epoch - 223ms/step
Epoch 14/100
76/76 - 17s - loss: 0.2141 - accuracy: 0.9132 - val_loss: 0.2129 - val_accuracy: 0.9219 - 17s/epoch - 220ms/step
Epoch 15/100
76/76 - 17s - loss: 0.2043 - accuracy: 0.9203 - val_loss: 0.3349 - val_accuracy: 0.8555 - 17s/epoch - 226ms/step
Epoch 16/100
76/76 - 17s - loss: 0.2300 - accuracy: 0.8995 - val_loss: 0.2293 - val_accuracy: 0.9053 - 17s/epoch - 225ms/step
Epoch 17/100
76/76 - 16s - loss: 0.1872 - accuracy: 0.9277 - val_loss: 0.2377 - val_accuracy: 0.9103 - 16s/epoch - 211ms/step
Epoch 18/100
76/76 - 17s - loss: 0.1888 - accuracy: 0.9261 - val_loss: 0.2732 - val_accuracy: 0.8920 - 17s/epoch - 225ms/step
Epoch 19/100
76/76 - 17s - loss: 0.1820 - accuracy: 0.9282 - val_loss: 0.2225 - val_accuracy: 0.9153 - 17s/epoch - 223ms/step
Epoch 20/100
76/76 - 16s - loss: 0.1594 - accuracy: 0.9344 - val_loss: 0.2087 - val_accuracy: 0.9236 - 16s/epoch - 217ms/step
Epoch 21/100
76/76 - 16s - loss: 0.1544 - accuracy: 0.9390 - val_loss: 0.1936 - val_accuracy: 0.9336 - 16s/epoch - 209ms/step
Epoch 22/100
76/76 - 17s - loss: 0.1593 - accuracy: 0.9365 - val_loss: 0.2085 - val_accuracy: 0.9219 - 17s/epoch - 227ms/step
Epoch 23/100
76/76 - 16s - loss: 0.1626 - accuracy: 0.9352 - val_loss: 0.2028 - val_accuracy: 0.9269 - 16s/epoch - 206ms/step
Epoch 24/100
76/76 - 17s - loss: 0.1783 - accuracy: 0.9273 - val_loss: 0.1780 - val_accuracy: 0.9385 - 17s/epoch - 230ms/step
Epoch 25/100
76/76 - 16s - loss: 0.1688 - accuracy: 0.9323 - val_loss: 0.1927 - val_accuracy: 0.9203 - 16s/epoch - 214ms/step
Epoch 26/100
76/76 - 16s - loss: 0.1394 - accuracy: 0.9464 - val_loss: 0.1929 - val_accuracy: 0.9302 - 16s/epoch - 214ms/step
Epoch 27/100
76/76 - 15s - loss: 0.1420 - accuracy: 0.9402 - val_loss: 0.1962 - val_accuracy: 0.9269 - 15s/epoch - 196ms/step
Epoch 28/100
76/76 - 16s - loss: 0.1339 - accuracy: 0.9498 - val_loss: 0.1887 - val_accuracy: 0.9369 - 16s/epoch - 216ms/step
Epoch 29/100
76/76 - 17s - loss: 0.1427 - accuracy: 0.9394 - val_loss: 0.3047 - val_accuracy: 0.8953 - 17s/epoch - 227ms/step
Epoch 30/100
76/76 - 15s - loss: 0.1431 - accuracy: 0.9414 - val_loss: 0.1752 - val_accuracy: 0.9468 - 15s/epoch - 193ms/step
Epoch 31/100
76/76 - 16s - loss: 0.1152 - accuracy: 0.9572 - val_loss: 0.2411 - val_accuracy: 0.9336 - 16s/epoch - 212ms/step
Epoch 32/100
76/76 - 14s - loss: 0.1417 - accuracy: 0.9448 - val_loss: 0.1903 - val_accuracy: 0.9369 - 14s/epoch - 186ms/step
Epoch 33/100
76/76 - 14s - loss: 0.1071 - accuracy: 0.9601 - val_loss: 0.1680 - val_accuracy: 0.9419 - 14s/epoch - 186ms/step
Epoch 34/100
76/76 - 15s - loss: 0.1166 - accuracy: 0.9556 - val_loss: 0.2033 - val_accuracy: 0.9252 - 15s/epoch - 200ms/step
Epoch 35/100
76/76 - 15s - loss: 0.1347 - accuracy: 0.9502 - val_loss: 0.1781 - val_accuracy: 0.9302 - 15s/epoch - 202ms/step
Epoch 36/100
76/76 - 16s - loss: 0.1104 - accuracy: 0.9572 - val_loss: 0.1733 - val_accuracy: 0.9402 - 16s/epoch - 206ms/step
Epoch 37/100
76/76 - 17s - loss: 0.1254 - accuracy: 0.9531 - val_loss: 0.2208 - val_accuracy: 0.9336 - 17s/epoch - 218ms/step
Epoch 38/100
76/76 - 15s - loss: 0.1041 - accuracy: 0.9597 - val_loss: 0.1790 - val_accuracy: 0.9485 - 15s/epoch - 196ms/step
Epoch 39/100
76/76 - 16s - loss: 0.0946 - accuracy: 0.9655 - val_loss: 0.1798 - val_accuracy: 0.9502 - 16s/epoch - 211ms/step
Epoch 40/100
76/76 - 15s - loss: 0.1012 - accuracy: 0.9643 - val_loss: 0.1592 - val_accuracy: 0.9502 - 15s/epoch - 202ms/step
Epoch 41/100
76/76 - 15s - loss: 0.0918 - accuracy: 0.9659 - val_loss: 0.2179 - val_accuracy: 0.9402 - 15s/epoch - 197ms/step
Epoch 42/100
76/76 - 16s - loss: 0.0894 - accuracy: 0.9722 - val_loss: 0.1556 - val_accuracy: 0.9585 - 16s/epoch - 212ms/step
Epoch 43/100
76/76 - 16s - loss: 0.1121 - accuracy: 0.9551 - val_loss: 0.2255 - val_accuracy: 0.9269 - 16s/epoch - 208ms/step
Epoch 44/100
76/76 - 15s - loss: 0.1213 - accuracy: 0.9522 - val_loss: 0.1698 - val_accuracy: 0.9551 - 15s/epoch - 198ms/step
Epoch 45/100
76/76 - 15s - loss: 0.0888 - accuracy: 0.9697 - val_loss: 0.1836 - val_accuracy: 0.9452 - 15s/epoch - 195ms/step
Epoch 46/100
76/76 - 16s - loss: 0.0887 - accuracy: 0.9689 - val_loss: 0.1350 - val_accuracy: 0.9585 - 16s/epoch - 209ms/step
Epoch 47/100
76/76 - 16s - loss: 0.0971 - accuracy: 0.9635 - val_loss: 0.1350 - val_accuracy: 0.9485 - 16s/epoch - 210ms/step
Epoch 48/100
76/76 - 18s - loss: 0.0747 - accuracy: 0.9709 - val_loss: 0.2554 - val_accuracy: 0.9269 - 18s/epoch - 242ms/step
Epoch 49/100
76/76 - 18s - loss: 0.0769 - accuracy: 0.9709 - val_loss: 0.1550 - val_accuracy: 0.9585 - 18s/epoch - 237ms/step
Epoch 50/100
76/76 - 16s - loss: 0.1267 - accuracy: 0.9593 - val_loss: 0.1782 - val_accuracy: 0.9385 - 16s/epoch - 212ms/step
Epoch 51/100
76/76 - 16s - loss: 0.0913 - accuracy: 0.9680 - val_loss: 0.2932 - val_accuracy: 0.9302 - 16s/epoch - 214ms/step
Epoch 52/100
76/76 - 13s - loss: 0.0914 - accuracy: 0.9655 - val_loss: 0.1593 - val_accuracy: 0.9585 - 13s/epoch - 168ms/step
Epoch 53/100
76/76 - 17s - loss: 0.0785 - accuracy: 0.9726 - val_loss: 0.2050 - val_accuracy: 0.9485 - 17s/epoch - 219ms/step
Epoch 54/100
76/76 - 16s - loss: 0.0830 - accuracy: 0.9705 - val_loss: 0.2049 - val_accuracy: 0.9535 - 16s/epoch - 207ms/step
Epoch 55/100
76/76 - 16s - loss: 0.0831 - accuracy: 0.9705 - val_loss: 0.1584 - val_accuracy: 0.9535 - 16s/epoch - 216ms/step
Epoch 56/100
76/76 - 16s - loss: 0.0707 - accuracy: 0.9763 - val_loss: 0.1617 - val_accuracy: 0.9635 - 16s/epoch - 216ms/step
Epoch 57/100
76/76 - 16s - loss: 0.0703 - accuracy: 0.9759 - val_loss: 0.1483 - val_accuracy: 0.9585 - 16s/epoch - 210ms/step
Epoch 58/100
76/76 - 16s - loss: 0.0845 - accuracy: 0.9705 - val_loss: 0.1841 - val_accuracy: 0.9485 - 16s/epoch - 213ms/step
Epoch 59/100
76/76 - 16s - loss: 0.0998 - accuracy: 0.9643 - val_loss: 0.1372 - val_accuracy: 0.9568 - 16s/epoch - 217ms/step
Epoch 60/100
76/76 - 16s - loss: 0.0652 - accuracy: 0.9759 - val_loss: 0.1843 - val_accuracy: 0.9535 - 16s/epoch - 216ms/step
Epoch 61/100
76/76 - 16s - loss: 0.0653 - accuracy: 0.9784 - val_loss: 0.1337 - val_accuracy: 0.9535 - 16s/epoch - 215ms/step
Epoch 62/100
76/76 - 16s - loss: 0.0684 - accuracy: 0.9780 - val_loss: 0.1441 - val_accuracy: 0.9618 - 16s/epoch - 214ms/step
Epoch 63/100
76/76 - 16s - loss: 0.0616 - accuracy: 0.9805 - val_loss: 0.1402 - val_accuracy: 0.9551 - 16s/epoch - 215ms/step
Epoch 64/100
76/76 - 15s - loss: 0.0589 - accuracy: 0.9788 - val_loss: 0.1705 - val_accuracy: 0.9618 - 15s/epoch - 202ms/step
Epoch 65/100
76/76 - 16s - loss: 0.0697 - accuracy: 0.9743 - val_loss: 0.1747 - val_accuracy: 0.9601 - 16s/epoch - 214ms/step
Epoch 66/100
76/76 - 16s - loss: 0.0861 - accuracy: 0.9730 - val_loss: 0.1456 - val_accuracy: 0.9585 - 16s/epoch - 214ms/step
Epoch 67/100
76/76 - 15s - loss: 0.0732 - accuracy: 0.9738 - val_loss: 0.1213 - val_accuracy: 0.9668 - 15s/epoch - 196ms/step
Epoch 68/100
76/76 - 14s - loss: 0.0716 - accuracy: 0.9755 - val_loss: 0.1174 - val_accuracy: 0.9701 - 14s/epoch - 186ms/step
Epoch 69/100
76/76 - 16s - loss: 0.0669 - accuracy: 0.9797 - val_loss: 0.1388 - val_accuracy: 0.9684 - 16s/epoch - 213ms/step
Epoch 70/100
76/76 - 16s - loss: 0.0675 - accuracy: 0.9763 - val_loss: 0.1923 - val_accuracy: 0.9551 - 16s/epoch - 207ms/step
Epoch 71/100
76/76 - 16s - loss: 0.1137 - accuracy: 0.9589 - val_loss: 0.1445 - val_accuracy: 0.9651 - 16s/epoch - 207ms/step
Epoch 72/100
76/76 - 16s - loss: 0.0899 - accuracy: 0.9693 - val_loss: 0.1617 - val_accuracy: 0.9535 - 16s/epoch - 217ms/step
Epoch 73/100
76/76 - 15s - loss: 0.0567 - accuracy: 0.9797 - val_loss: 0.1743 - val_accuracy: 0.9485 - 15s/epoch - 196ms/step
Epoch 74/100
76/76 - 15s - loss: 0.0797 - accuracy: 0.9726 - val_loss: 0.1258 - val_accuracy: 0.9651 - 15s/epoch - 204ms/step
Epoch 75/100
76/76 - 13s - loss: 0.0857 - accuracy: 0.9701 - val_loss: 0.2392 - val_accuracy: 0.9551 - 13s/epoch - 174ms/step
Epoch 76/100
76/76 - 16s - loss: 0.0912 - accuracy: 0.9693 - val_loss: 0.1359 - val_accuracy: 0.9701 - 16s/epoch - 216ms/step
Epoch 77/100
76/76 - 16s - loss: 0.0640 - accuracy: 0.9797 - val_loss: 0.1384 - val_accuracy: 0.9601 - 16s/epoch - 212ms/step
Epoch 78/100
76/76 - 17s - loss: 0.0800 - accuracy: 0.9693 - val_loss: 0.1551 - val_accuracy: 0.9618 - 17s/epoch - 227ms/step
Epoch 79/100
76/76 - 15s - loss: 0.0571 - accuracy: 0.9813 - val_loss: 0.1663 - val_accuracy: 0.9618 - 15s/epoch - 199ms/step
Epoch 80/100
76/76 - 16s - loss: 0.0482 - accuracy: 0.9813 - val_loss: 0.1543 - val_accuracy: 0.9701 - 16s/epoch - 216ms/step
Epoch 81/100
76/76 - 17s - loss: 0.0588 - accuracy: 0.9792 - val_loss: 0.1239 - val_accuracy: 0.9701 - 17s/epoch - 219ms/step
Epoch 82/100
76/76 - 17s - loss: 0.0460 - accuracy: 0.9842 - val_loss: 0.1696 - val_accuracy: 0.9668 - 17s/epoch - 221ms/step
Epoch 83/100
76/76 - 14s - loss: 0.0470 - accuracy: 0.9830 - val_loss: 0.2891 - val_accuracy: 0.9485 - 14s/epoch - 187ms/step
Epoch 84/100
76/76 - 16s - loss: 0.0825 - accuracy: 0.9693 - val_loss: 0.1597 - val_accuracy: 0.9585 - 16s/epoch - 216ms/step
Epoch 85/100
76/76 - 16s - loss: 0.0517 - accuracy: 0.9817 - val_loss: 0.2136 - val_accuracy: 0.9568 - 16s/epoch - 216ms/step
Epoch 86/100
76/76 - 15s - loss: 0.0504 - accuracy: 0.9834 - val_loss: 0.1814 - val_accuracy: 0.9518 - 15s/epoch - 200ms/step
Epoch 87/100
76/76 - 13s - loss: 0.0526 - accuracy: 0.9821 - val_loss: 0.1680 - val_accuracy: 0.9618 - 13s/epoch - 178ms/step
Epoch 88/100
76/76 - 16s - loss: 0.0420 - accuracy: 0.9871 - val_loss: 0.2491 - val_accuracy: 0.9286 - 16s/epoch - 211ms/step
Epoch 89/100
76/76 - 14s - loss: 0.0536 - accuracy: 0.9792 - val_loss: 0.1871 - val_accuracy: 0.9618 - 14s/epoch - 179ms/step
Epoch 90/100
76/76 - 16s - loss: 0.0711 - accuracy: 0.9780 - val_loss: 0.1829 - val_accuracy: 0.9585 - 16s/epoch - 214ms/step
Epoch 91/100
76/76 - 16s - loss: 0.0688 - accuracy: 0.9784 - val_loss: 0.1982 - val_accuracy: 0.9568 - 16s/epoch - 206ms/step
Epoch 92/100
76/76 - 17s - loss: 0.0483 - accuracy: 0.9826 - val_loss: 0.1974 - val_accuracy: 0.9551 - 17s/epoch - 220ms/step
Epoch 93/100
76/76 - 16s - loss: 0.0744 - accuracy: 0.9805 - val_loss: 0.1405 - val_accuracy: 0.9551 - 16s/epoch - 209ms/step
Epoch 94/100
76/76 - 15s - loss: 0.0579 - accuracy: 0.9830 - val_loss: 0.1545 - val_accuracy: 0.9618 - 15s/epoch - 201ms/step
Epoch 95/100
76/76 - 16s - loss: 0.0542 - accuracy: 0.9805 - val_loss: 0.2258 - val_accuracy: 0.9535 - 16s/epoch - 215ms/step
Epoch 96/100
76/76 - 16s - loss: 0.0723 - accuracy: 0.9767 - val_loss: 0.1247 - val_accuracy: 0.9684 - 16s/epoch - 206ms/step
Epoch 97/100
76/76 - 15s - loss: 0.0638 - accuracy: 0.9792 - val_loss: 0.1473 - val_accuracy: 0.9635 - 15s/epoch - 192ms/step
Epoch 98/100
76/76 - 17s - loss: 0.0502 - accuracy: 0.9830 - val_loss: 0.1383 - val_accuracy: 0.9668 - 17s/epoch - 229ms/step
Epoch 99/100
76/76 - 17s - loss: 0.0734 - accuracy: 0.9772 - val_loss: 0.1141 - val_accuracy: 0.9684 - 17s/epoch - 218ms/step
Epoch 100/100
76/76 - 15s - loss: 0.0671 - accuracy: 0.9738 - val_loss: 0.1801 - val_accuracy: 0.9635 - 15s/epoch - 199ms/step
In [ ]:
best_model.save('./models/best_model')
2024-07-20 17:51:41.913399: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
	 [[{{node inputs}}]]
2024-07-20 17:51:42.059944: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'inputs' with dtype float and shape [?,6272]
	 [[{{node inputs}}]]
WARNING:absl:Found untraced functions such as _jit_compiled_convolution_op, _jit_compiled_convolution_op, _jit_compiled_convolution_op, _update_step_xla while saving (showing 4 of 4). These functions will not be directly callable after loading.
INFO:tensorflow:Assets written to: ./models/best_model/assets
INFO:tensorflow:Assets written to: ./models/best_model/assets
In [ ]:
best_model = tf.keras.models.load_model("models/best_model")
best_model.summary()
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.RestoredOptimizer` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.RestoredOptimizer`.
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.
WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
Model: "sequential_92"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 rescaling_92 (Rescaling)    (None, 240, 240, 3)       0         
                                                                 
 conv2d_276 (Conv2D)         (None, 238, 238, 24)      672       
                                                                 
 max_pooling2d_276 (MaxPooli  (None, 119, 119, 24)     0         
 ng2D)                                                           
                                                                 
 conv2d_277 (Conv2D)         (None, 117, 117, 12)      2604      
                                                                 
 max_pooling2d_277 (MaxPooli  (None, 58, 58, 12)       0         
 ng2D)                                                           
                                                                 
 conv2d_278 (Conv2D)         (None, 56, 56, 8)         872       
                                                                 
 max_pooling2d_278 (MaxPooli  (None, 28, 28, 8)        0         
 ng2D)                                                           
                                                                 
 flatten_92 (Flatten)        (None, 6272)              0         
                                                                 
 dropout_92 (Dropout)        (None, 6272)              0         
                                                                 
 dense_184 (Dense)           (None, 4)                 25092     
                                                                 
 dense_185 (Dense)           (None, 2)                 10        
                                                                 
=================================================================
Total params: 29,250
Trainable params: 29,250
Non-trainable params: 0
_________________________________________________________________
In [ ]:
plot_history(best_model_hist)
No description has been provided for this image
In [ ]:
best_model = tf.keras.models.load_model("models/best_model")
best_model_pred = best_model.predict(test_ds).argmax(axis=1)

full_evaluation(test_labels, best_model_pred, 'best_model')
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.RestoredOptimizer` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.RestoredOptimizer`.
WARNING:absl:At this time, the v2.11+ optimizer `tf.keras.optimizers.Adam` runs slowly on M1/M2 Macs, please use the legacy Keras optimizer instead, located at `tf.keras.optimizers.legacy.Adam`.
WARNING:absl:There is a known slowdown when using v2.11+ Keras optimizers on M1/M2 Macs. Falling back to the legacy Keras optimizer, i.e., `tf.keras.optimizers.legacy.Adam`.
24/24 [==============================] - 1s 60ms/step
Accuracy: 0.9694
              precision    recall  f1-score   support

         0.0       0.94      0.99      0.97       336
         1.0       0.99      0.95      0.97       416

    accuracy                           0.97       752
   macro avg       0.97      0.97      0.97       752
weighted avg       0.97      0.97      0.97       752

Coefficient of determination: 0.8763
Mean squared error: 0.0306
Mean absolute error: 0.0306
No description has been provided for this image
No description has been provided for this image
In [ ]:
fpr, tpr, thresholds = roc_curve(test_labels, best_model_pred)
roc_auc = auc(fpr, tpr)

plt.figure()
plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.4f)' % roc_auc)
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic')
plt.legend(loc="lower right")
plt.savefig('plots/roc_curve_best_model.pdf')
No description has been provided for this image

Compare Recall scores¶

In [ ]:
def calc_recall_precision(test, predict):
    cm = confusion_matrix(test, predict)
    recall = cm[1, 1] / (cm[1, 1] + cm[1, 0])
    precision = cm[1, 1] / (cm[1, 1] + cm[0, 1])
    return recall, precision

test_labels = np.zeros(752)
test_labels[0:336] = 0
test_labels[336:] = 1
In [ ]:
compare = []

for model, pred in zip(['SVM', 'LR', 'kNN', 'CNN', 'best_model'], [svm_pred, lr_pred, knn_pred, cnn_pred, best_model_pred]):
    # print(f'Calculating metrics for {model}')
    
    if model != 'CNN' and model != 'best_model':
        accuracy = accuracy_score(y_test, pred)
        recall, precision = calc_recall_precision(y_test, pred)
        mse = mean_squared_error(y_test, pred)
        mae = mean_absolute_error(y_test, pred)
        r2 = r2_score(y_test, pred)
    else:
        accuracy = accuracy_score(test_labels, pred)
        recall, precision = calc_recall_precision(test_labels, pred)
        mse = mean_squared_error(test_labels, pred)
        mae = mean_absolute_error(test_labels, pred)
        r2 = r2_score(test_labels, pred)
    
    compare.append({'model': model,
                    'recall': recall, 
                    'accuracy': accuracy, 
                    'precision': precision,
                    'f1': 2 * (precision * recall) / (precision + recall),
                    'r2': r2,
                    'mse': mse,
                    'mae': mae})
In [ ]:
recallComp = pd.DataFrame(compare)
recallComp.sort_values('recall', ascending=False)
Out[ ]:
model recall accuracy precision f1 r2 mse mae
0 SVM 0.961652 0.980080 0.993902 0.977511 0.919520 0.019920 0.019920
1 LR 0.961652 0.978752 0.990881 0.976048 0.914155 0.021248 0.021248
4 best_model 0.951923 0.969415 0.992481 0.971779 0.876259 0.030585 0.030585
2 kNN 0.946903 0.976096 1.000000 0.972727 0.903424 0.023904 0.023904
3 CNN 0.872596 0.922872 0.986413 0.926020 0.687958 0.077128 0.077128
In [ ]:
recallComp.to_csv('Comparison-AllModels.csv', index=False)
In [ ]: